# ------------------------------------------------------------------------------
# Build troponin outcomes
# Author: Cassidy Shubatt <cshubatt@gmail.com>
# To run: bsub -q big -R "rusage[mem=10000]" bash 03_build_trop_outcomes.sh
# ------------------------------------------------------------------------------

# Setup ------------------------------------------------------------------------
library(yaml) # read_yaml absolute filepaths
library(data.table)
library(here) # here() relative filepaths
library(testit) # assert function
library(tidyverse)
library(glue)
library(lubridate)
library(reticulate)

u <- modules::use(here::here("lib", "util.R"))
source_python(here::here("lib", "python_util.py"))

# Load Data --------------------------------------------------------------------
message("Loading data...")
paths <- read_yaml(here::here("lib", "filepaths.yml"))

# pre-built outcomes
troponin <- read_csv(paths$cohort$troponin)

# raw labs data
cohort <- read_csv(paths$cohort$ids_flags) %>%
  setnames("dummydate", "enc_date")
min_date <- min(cohort$enc_date) %>% date()
max_date <- max(cohort$enc_date) %>% date()

troponin_all <- read_parquet_file(paths$raw_cohort$troponin_labs) %>%
  mutate(lab_date = date(lab_date)) %>%
  filter((lab_date >= min_date) & (lab_date <= max_date)) %>%
  select(ptid, lab_date, lab_value_raw, lab_value_mod)

# Build Vars -------------------------------------------------------------------
for (window in c(30, 60, 180, 365)) {
  message("Building outcome vars for ", window, " day window")
  padded_window <- str_pad(window, 3, "left", "0")
  trop_var_old <- glue("max_troponin-start_date_p1-start_date_p{window}")
  trop_var_new <- glue("max_troponin_{padded_window}")

  troponin <- troponin %>%
    setnames(trop_var_old, "troponin") %>%
    mutate(troponin = replace_na(troponin, 0)) %>%
    mutate(max_trop_pos = troponin > 0) %>%
    mutate(max_trop_0.05 = troponin > 0.05) %>%
    mutate(max_trop_0.1 = troponin > 0.1) %>%
    mutate(max_trop_0.5 = troponin > 0.5) %>%
    setnames(
      c(
        "troponin", "max_trop_pos", "max_trop_0.05", "max_trop_0.1",
        "max_trop_0.5"
      ),
      c(
        trop_var_new, glue("max_trop_{padded_window}_pos"),
        glue("max_trop_{padded_window}_0.05"),
        glue("max_trop_{padded_window}_0.1"),
        glue("max_trop_{padded_window}_0.5")
      )
    )
}

# Save Long-Term ---------------------------------------------------------------
save_fp <- paths$analysis$troponin
message("Saving long-term troponin to ", save_fp, "...")
write_rds(troponin, save_fp)

# Same-day troponin ------------------------------------------------------------
message("Matching tn labs to same-day encounters...")
joined_df <- full_join(cohort, troponin_all, by = "ptid") %>%
  filter(enc_date == lab_date) %>%
  group_by(ed_enc_id) %>%
  summarize(max_trop = max(lab_value_mod)) %>%
  ungroup()

tn_cohort <- cohort %>%
  u$safe_left_join(joined_df)

# Tn Groups --------------------------------------------------------------------
message("Grouping by troponin level...")
tn_levels <- c("None", "0", "(0,0.05]", "(0.05,0.1]", "(0.1,0.5]", ">0.5")
tn_cohort <- tn_cohort %>%
  mutate(
    tn_group = case_when(
      is.na(max_trop) ~ "None",
      max_trop == 0 ~ "0",
      max_trop <= 0.05 ~ "(0,0.05]",
      max_trop <= 0.1 ~ "(0.05,0.1]",
      max_trop <= 0.5 ~ "(0.1,0.5]",
      TRUE ~ ">0.5"
    )
  ) %>%
  mutate(tn_group = factor(tn_group, levels = tn_levels))

message("Saving same-day troponin data.frame...")
save_tn <- tn_cohort %>%
  mutate(maxtrop_sameday = max_trop) %>%
  mutate(tn_group_sameday = tn_group) %>%
  select(ed_enc_id, maxtrop_sameday, tn_group_sameday)

write_rds(save_tn, paths$analysis$sameday_tn)

message("Done.")
